package com.datahole.suffixarray.util;

import java.io.PrintWriter;
import java.io.StringWriter;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Date;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.log4j.Logger;

import com.datahole.suffixarray.util.CharMapper.CharType;

/**
 * 字符串处理类
 * 
 * @author hlyue
 * 
 */
public class StringUtil {

	private static Logger logger = Logger.getLogger(StringUtil.class);
	private static String dateFormat = "yyyyMMdd";
	private static SimpleDateFormat format = new SimpleDateFormat(dateFormat);

	/**
	 * 判断字符串是否是数字串
	 * 
	 * @param str
	 * @return
	 */
	public static boolean isNumberic(String str) {
		Pattern pattern = Pattern.compile("[0-9]{1,}");
		Matcher matcher = pattern.matcher((CharSequence) str);
		return matcher.matches();
	}

	/**
	 * 判断字符串是否是标点串
	 * 
	 * @param str
	 * @return
	 */
	public static boolean isDotString(String input) {
		if (input == null)
			return false;
		String trimInput = input.trim();
		for (int i = 0; i < trimInput.length(); i++) {
			char c = trimInput.charAt(i);
			if (!CharMapper.isDot(c))
				return false;
		}
		return true;
	}

	/**
	 * 
	 * @param input
	 * @return
	 */
	public static boolean containJapanese(String input) {
		if (input == null)
			return false;
		String trimInput = input.trim();
		for (int i = 0; i < trimInput.length(); i++) {
			char c = trimInput.charAt(i);
			if (CharMapper.getType(c) == CharType.Japanese)
				return true;
		}
		return false;
	}

	/**
	 * 
	 * @param input
	 * @return
	 */
	public static boolean containChinese(String input) {
		if (input == null)
			return false;
		String trimInput = input.trim();
		for (int i = 0; i < trimInput.length(); i++) {
			char c = trimInput.charAt(i);
			if (CharMapper.getType(c) == CharType.Chinese)
				return true;
		}
		return false;
	}

	/**
	 * 删除字符串中开头的数字串
	 * 
	 * @param input
	 * @return
	 */
	public static String removeFrontNum(String input) {
		if (input == null)
			return null;
		int length = input.length();
		int index = 1;
		for (int i = 0; i < input.length(); i++) {
			char c = input.charAt(i);
			if (CharMapper.getType(c) == CharType.Digit)
				index++;
			else
				break;
		}
		return input.substring(index, length);
	}

	/**
	 * 删除字符串中结尾的数字串
	 * 
	 * @param input
	 * @return
	 */
	public static String removeBackNum(String input) {
		if (input == null)
			return null;
		StringBuilder sb = new StringBuilder(input);
		String result = removeFrontNum(sb.reverse().toString());
		return new StringBuilder(result).reverse().toString();
	}

	/**
	 * 返回字符串中的有效子串
	 * 
	 * @param input
	 * @return
	 */
	public static int[] validateSubString(String input) {
		if (input == null)
			return null;
		int[] pos = new int[2];
		int length = input.length();
		for (int i = 0; i < length; i++) {
			char c = input.charAt(i);
			if (CharMapper.getType(c) == CharType.Digit)
				pos[0]++;
			else
				break;
		}
		StringBuilder sb = new StringBuilder(input);
		input = sb.reverse().toString();
		for (int i = 0; i < length; i++) {
			char c = input.charAt(i);
			if (CharMapper.getType(c) == CharType.Digit)
				pos[1]++;
			else
				break;
		}
		pos[1] = length - pos[1];
		return pos;
	}

	/**
	 * 文本串标准化（中文繁体转简体，英文大写转小写，特殊符号转点）
	 * 
	 * @param input
	 * @return
	 */

	public static String toNormal(String input) {
		if (input == null)
			return null;
		String trimInput = input.trim();
		int len = trimInput.length();
		StringBuilder sb = new StringBuilder(len);
		for (int i = 0; i < len; i++) {
			char c = trimInput.charAt(i);
			char pre;
			if (CharMapper.getType(c) == CharType.Dot) { // 1、标点全部转为下划线
				if (i > 0) {
					pre = trimInput.charAt(i - 1);
					if (CharMapper.getType(pre) != CharType.Dot)
						sb.append(".");
				}
			} else if (CharMapper.getType(c) == CharType.English) {
				char _c = CharMapper.toLowerCase(c);
				if (i > 0) {
					pre = trimInput.charAt(i - 1);
					if (CharMapper.getType(pre) == CharType.Chinese) {
						sb.append(".");
						sb.append(_c);
					} else {
						sb.append(_c);
					}
				} else {
					sb.append(_c);
				}
			} else if (CharMapper.getType(c) == CharType.Chinese) { // 3、繁体转为简体
				if (i > 0) {
					pre = trimInput.charAt(i - 1);
					if (CharMapper.getType(pre) == CharType.English) {
						sb.append(".");
						sb.append(CharMapper.simp2trad(c));
					} else {
						sb.append(CharMapper.simp2trad(c));
					}
				} else {
					sb.append(CharMapper.simp2trad(c));
				}
			} else if (CharMapper.getType(c) == CharType.DigitExt) { // 4、特殊数字转阿拉伯数字
				sb.append(CharMapper.toArab(c));
			} else {
				sb.append(CharMapper.toDBCCase(c)); // 5、全角转为半角
			}
			if (i == len - 1 && CharMapper.getType(c) == CharType.Dot)
				sb.deleteCharAt(sb.length() - 1);
		}
		return sb.toString();
	}

	/**
	 * 转化为显示格式
	 * 
	 * @param input
	 * @return
	 */
	public static String toShow(String input) {
		if (input == null)
			return null;
		String trimInput = input.trim();
		int len = trimInput.length();
		StringBuilder sb = new StringBuilder(len);
		for (int i = 0; i < len; i++) {
			char c = trimInput.charAt(i);
			if (CharMapper.getType(c) == CharType.Dot) { // 1、中文标点符号转英文标点
				if (i > 0) {
					char pre = trimInput.charAt(i - 1);
					if (CharMapper.getType(pre) != CharType.Dot)
						sb.append(CharMapper.toEnPun(c));

				} else {
					sb.append(CharMapper.toEnPun(c));
				}
			} else if (CharMapper.getType(c) == CharType.Chinese) { // 2、繁体转为简体
				if (isChineseDigit(c))
					sb.append(c);
				else
					sb.append(CharMapper.simp2trad(c));
			} else if (CharMapper.getType(c) == CharType.DigitExt) { // 3、特殊数字转阿拉伯数字
				sb.append(CharMapper.toArab(c));
			} else {
				sb.append(CharMapper.toDBCCase(c)); // 4、全角转为半角
			}
			// if (i == len - 1 && CharMapper.getType(c) == CharType.Dot)
			// sb.deleteCharAt(i);
		}
		return sb.toString();
	}

	/**
	 * 
	 * @param input
	 * @return
	 */
	public static String toStandard(String input) {
		if (input == null)
			return null;
		String trimInput = input.trim();
		StringBuilder sb = new StringBuilder(trimInput.length());
		for (int i = 0; i < trimInput.length(); i++) {
			char c = trimInput.charAt(i);
			if (CharMapper.getType(c) == CharType.Dot) { // 1、中文标点符号转英文标点
				sb.append(CharMapper.toEnPun(c));
			} else if (CharMapper.getType(c) == CharType.Chinese) { // 2、繁体转为简体
				if (isChineseDigit(c))
					sb.append(c);
				else
					sb.append(CharMapper.simp2trad(c));
			} else if (CharMapper.getType(c) == CharType.DigitExt) { // 3、特殊数字转阿拉伯数字
				sb.append(CharMapper.toArab(c));
			} else {
				sb.append(CharMapper.toDBCCase(c)); // 4、全角转为半角
			}
		}
		return sb.toString();
	}

	public static boolean isChineseDigit(char c) {
		switch (c) {
		case '一':
			return true;
		case '二':
			return true;
		case '三':
			return true;
		case '四':
			return true;
		case '五':
			return true;
		case '六':
			return true;
		case '七':
			return true;
		case '八':
			return true;
		case '九':
			return true;
		case '零':
			return true;
		}
		return false;
	}

	/**
	 * 去掉字符串中的标点符号
	 * 
	 * @param s
	 * @return
	 */
	public static String removeDots(String input) {
		if (input == null)
			return null;
		StringBuilder sb = new StringBuilder(input.length());
		for (int i = 0; i < input.length(); i++) {
			char c = input.charAt(i);
			if (CharMapper.getType(c) != CharType.Dot)
				sb.append(c);
		}
		return sb.toString();
	}

	/**
	 * 把字符串由繁体转为简体
	 * 
	 * @param input
	 * @return
	 */
	public static String simp2Trad(String input) {
		if (input == null)
			return null;
		StringBuilder sb = new StringBuilder(input.length());
		for (int i = 0; i < input.length(); i++) {
			char c = input.charAt(i);
			sb.append(CharMapper.simp2trad(c));
		}

		return sb.toString();
	}

	/**
	 * 判断字符串是否为英文字符串
	 * 
	 * @param str
	 * @return
	 */
	public static boolean isENString(String str) {
		Pattern pattern = Pattern.compile("[a-zA-Z]{1,}");
		Matcher matcher = pattern.matcher((CharSequence) str);
		return matcher.matches();
	}

	/**
	 * 过滤微博数据,e.g. "[转发]回复@想吃月亮的SKYDOG:擦/流汗/@想吃月亮的SKYDOG1:卧铺的"为"擦/流汗/卧铺的"
	 * 
	 * @param str
	 * @return
	 */
	public static String filterStr(String str) {
		Pattern pattern = Pattern.compile("回复@([^:]+?):|\\[转发\\]|@([^:]+?):");
		Matcher matcher = pattern.matcher((CharSequence) str);
		String s = matcher.replaceAll("");
		return s;
	}

	/**
	 * 判断字符串是否只有英文词和数字
	 * 
	 * @param str
	 * @return
	 */
	public static boolean isNumbericAndEn(String str) {
		Pattern pattern = Pattern.compile("[a-zA-Z0-9]{1,}");
		Matcher matcher = pattern.matcher((CharSequence) str);
		return matcher.matches();
	}

	public static boolean isNotChDiEnStr(String input) {
		if (input == null)
			return false;
		for (int i = 0; i < input.length(); i++) {
			char c = input.charAt(i);
			if (CharMapper.isChinese(c) || CharMapper.isDigit(c)
					|| CharMapper.isEnglish(c))
				continue;
			else
				return true;
		}
		return false;
	}

	/**
	 * 获取系统抛出的运行时异常信息
	 * 
	 * @param t
	 * @return
	 */
	public static String getTrace(Throwable t) {
		StringWriter stringWriter = new StringWriter();
		PrintWriter writer = new PrintWriter(stringWriter);
		t.printStackTrace(writer);
		StringBuffer buffer = stringWriter.getBuffer();
		stringWriter = null;
		writer = null;
		return buffer.toString();
	}

	/**
	 * 获取两个时间点之间的日期列表
	 * 
	 * @param date1
	 * @param date2
	 * @return
	 */
	public static List<String> getDateList(String date1, String date2) {

		List<String> dateList = new ArrayList<String>(1);
		if (date1.equals(date2)) {
			logger.info("两个日期相等!");
			dateList.add(date1);
			return dateList;
		}
		String tmp;
		if (date1.compareTo(date2) > 0) { // 确保 date1的日期不晚于date2
			tmp = date1;
			date1 = date2;
			date2 = tmp;
		}

		tmp = format.format(str2Date(date1).getTime() + 3600 * 24 * 1000);

		int num = 0;
		while (tmp.compareTo(date2) < 0) {
			dateList.add(tmp);
			num++;
			tmp = format.format(str2Date(tmp).getTime() + 3600 * 24 * 1000);
		}
		if (num == 0) {
			dateList.add(date1);
			dateList.add(date2);
			return dateList;
		} else
			return dateList;
	}

	/**
	 * 日期字符串格式化
	 * 
	 * @param str
	 * @return
	 */
	private static Date str2Date(String str) {
		if (str == null)
			return null;

		try {
			return format.parse(str);
		} catch (ParseException e) {
			e.printStackTrace();
		}
		return null;
	}

	/**
	 * 取得从date往前num的日期,日期串格式yyyyMMdd
	 * 
	 * @param date
	 * @param num
	 * @return
	 */
	public static String subDateByNum(String date, int num) {
		SimpleDateFormat df = new SimpleDateFormat("yyyyMMdd");
		Date d = null;
		Calendar cal = null;
		try {
			d = (Date) df.parse(date);
			cal = Calendar.getInstance();
			cal.setTime(d);
			cal.add(Calendar.DATE, -num); // 减num天
		} catch (Exception e) {
			logger.info(StringUtil.getTrace(e));
		}
		return df.format(cal.getTime());
	}

	/**
	 * 计算短语文本A与短语文本B之间的相似度 参考算法：http://www.doc88.com/p-674168364219.html
	 * 
	 * @param A
	 * @param B
	 * @return
	 */
	public static float similarity(String A, String B) {
		return (sc(A, B) + sc(B, A)) / 2;
	}

	/**
	 * 计算短语文本A相对于短语文本B的相似度sc
	 * 
	 * @param A
	 * @param B
	 * @return
	 */
	private static float sc(String A, String B) {
		float sc = 0.0f;
		for (int i = 0; i < A.length(); i++) {
			sc += cc(A, i, B);
		}
		sc /= A.length();// 归一化
		return sc;
	}

	/**
	 * 计算匹配文字Ai对于整个相似度的贡献量
	 * 
	 * @param A
	 * @param i
	 * @param B
	 * @return
	 */
	private static float cc(String A, int i, String B) {
		return (B.length() - posOffset(A, i, B)) / (float) B.length();
	}

	/**
	 * 计算匹配文字Ai的最小匹配偏移值posOffset
	 * 
	 * @param A
	 * @param i
	 * @param B
	 * @return
	 */
	private static int posOffset(String A, int i, String B) {
		int posOffset = B.length();
		for (int j = 0; j < B.length(); j++) {
			if (i - j >= 0 && i - j < posOffset
					&& A.charAt(i) == B.charAt(i - j))
				return j;
			if (i + j < posOffset && A.charAt(i) == B.charAt(i + j))
				return j;
		}
		return posOffset;
	}


	/**
	 * 把句子中的标点符号，转换成逗号
	 * 
	 * @param input
	 * @return
	 */
	public static String puncToComma(String input) {
		if (input == null)
			return null;
		StringBuilder sb = new StringBuilder(input.length());
		for (int i = 0; i < input.length(); i++) {
			char c = input.charAt(i);
			if (CharMapper.getType(c) == CharType.Dot)
				sb.append(',');
			else
				sb.append(c);

		}
		return sb.toString();
	}
}
